Skip to content

Conversation

@petar-avramovic
Copy link
Collaborator

No description provided.

Copy link
Collaborator Author

petar-avramovic commented Nov 17, 2025

@llvmbot
Copy link
Member

llvmbot commented Nov 17, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Petar Avramovic (petar-avramovic)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/168410.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp (+8)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll (-7)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll (-6)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 907f8300de6d2..396d64625fb5c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -173,6 +173,14 @@ Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) {
   if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc))))
     return RALSrc;
 
+  // RALSrc = G_ANYEXT S16Src
+  // TruncSrc = G_AMDGPU_READANYLANE RALSrc
+  // Src = G_TRUNC TruncSrc
+  if (mi_match(Src, MRI,
+               m_GTrunc(m_GAMDGPUReadAnyLane(m_GAnyExt(m_Reg(RALSrc)))))) {
+    return RALSrc;
+  }
+
   // TruncSrc = G_AMDGPU_READANYLANE RALSrc
   // AextSrc = G_TRUNC TruncSrc
   // Src = G_ANYEXT AextSrc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
index 4361e5c113708..27005e7aa175e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
@@ -1070,9 +1070,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
 ; GFX11-True16-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX11-True16-NEXT:    ds_load_u16_d16 v1, v1
 ; GFX11-True16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-True16-NEXT:    v_readfirstlane_b32 s0, v1
-; GFX11-True16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-True16-NEXT:    v_mov_b16_e32 v1.l, s0
 ; GFX11-True16-NEXT:    ds_store_b16 v0, v1
 ; GFX11-True16-NEXT:    s_endpgm
 ;
@@ -1089,10 +1086,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
 ; GFX12-True16-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX12-True16-NEXT:    ds_load_u16_d16 v1, v1
 ; GFX12-True16-NEXT:    s_wait_dscnt 0x0
-; GFX12-True16-NEXT:    v_readfirstlane_b32 s0, v1
-; GFX12-True16-NEXT:    s_wait_alu 0xf1ff
-; GFX12-True16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-True16-NEXT:    v_mov_b16_e32 v1.l, s0
 ; GFX12-True16-NEXT:    ds_store_b16 v0, v1
 ; GFX12-True16-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
index bf36deac33380..9bf140cf744db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
@@ -13,9 +13,6 @@ define amdgpu_ps void @load_uniform_P1_i16_gfx12(ptr addrspace(1) inreg %ptra, p
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    global_load_d16_b16 v2, v2, s[0:1]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_endpgm
 ;
@@ -312,9 +309,6 @@ define amdgpu_ps void @load_uniform_P4_i16_gfx12(ptr addrspace(4) inreg %ptra, p
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    global_load_d16_b16 v2, v2, s[0:1]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_endpgm
 ;

@llvmbot
Copy link
Member

llvmbot commented Nov 17, 2025

@llvm/pr-subscribers-llvm-globalisel

Author: Petar Avramovic (petar-avramovic)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/168410.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp (+8)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll (-7)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll (-6)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 907f8300de6d2..396d64625fb5c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -173,6 +173,14 @@ Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) {
   if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc))))
     return RALSrc;
 
+  // RALSrc = G_ANYEXT S16Src
+  // TruncSrc = G_AMDGPU_READANYLANE RALSrc
+  // Src = G_TRUNC TruncSrc
+  if (mi_match(Src, MRI,
+               m_GTrunc(m_GAMDGPUReadAnyLane(m_GAnyExt(m_Reg(RALSrc)))))) {
+    return RALSrc;
+  }
+
   // TruncSrc = G_AMDGPU_READANYLANE RALSrc
   // AextSrc = G_TRUNC TruncSrc
   // Src = G_ANYEXT AextSrc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
index 4361e5c113708..27005e7aa175e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
@@ -1070,9 +1070,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
 ; GFX11-True16-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX11-True16-NEXT:    ds_load_u16_d16 v1, v1
 ; GFX11-True16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-True16-NEXT:    v_readfirstlane_b32 s0, v1
-; GFX11-True16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-True16-NEXT:    v_mov_b16_e32 v1.l, s0
 ; GFX11-True16-NEXT:    ds_store_b16 v0, v1
 ; GFX11-True16-NEXT:    s_endpgm
 ;
@@ -1089,10 +1086,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
 ; GFX12-True16-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX12-True16-NEXT:    ds_load_u16_d16 v1, v1
 ; GFX12-True16-NEXT:    s_wait_dscnt 0x0
-; GFX12-True16-NEXT:    v_readfirstlane_b32 s0, v1
-; GFX12-True16-NEXT:    s_wait_alu 0xf1ff
-; GFX12-True16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-True16-NEXT:    v_mov_b16_e32 v1.l, s0
 ; GFX12-True16-NEXT:    ds_store_b16 v0, v1
 ; GFX12-True16-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
index bf36deac33380..9bf140cf744db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
@@ -13,9 +13,6 @@ define amdgpu_ps void @load_uniform_P1_i16_gfx12(ptr addrspace(1) inreg %ptra, p
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    global_load_d16_b16 v2, v2, s[0:1]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_endpgm
 ;
@@ -312,9 +309,6 @@ define amdgpu_ps void @load_uniform_P4_i16_gfx12(ptr addrspace(4) inreg %ptra, p
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    global_load_d16_b16 v2, v2, s[0:1]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_endpgm
 ;

@github-actions
Copy link

github-actions bot commented Nov 17, 2025

🐧 Linux x64 Test Results

  • 186291 tests passed
  • 4853 tests skipped

@petar-avramovic petar-avramovic force-pushed the users/petar-avramovic/readanylane-combine branch from 3590a6e to 9e70882 Compare November 18, 2025 13:59
Copy link
Contributor

@vangthao95 vangthao95 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would we want a .mir test for this? Otherwise LGTM.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants